**************************************************************************************************************************************************
******************* Code for 'Are Investment Tax Breaks Effective: Australian Evidence' *******************
***************************************************** Code for DD data construction ******************************************************************

**************************************************DATE: Jun 2024 ******************************************************************************
*********************************************************************************************************************************************
*** Code strcuture
*00. Preliminaries and globals
*01. Make  BIT income varaibles
*02 CAPEX data iniatial cleaning
*03 Bring in indicator if company in the grou



************* 00. Preliminaries **********					  
						  
*set maxvar 20000
clear all
global data ""
global savefile "" 
global ids id_bg // use entreprise group merging
sysdir set PLUS ""
sysdir set PERSONAL ""
mata: mata mlib index


************* 01. Make  BIT income varaible **********					  
use "$data\bit_frame_bg_21.dta" // Load in EG level raw dataset for income varaivbles
keep tsid income* id_bg
save "$savefile\incomes_eg.dta"

************* 02. CAPEX data iniatial cleaning **********					  
use "$data\capex_bit_frame_20.dta", clear // Load capex data at id level

** Iniaitail cleaning

drop if id == "" // dropping as wont have any bit or frame data

** Some fimographics
bysort id: egen max_sisca = max(latest_sisca08)
replace latest_sisca08= max_sisca if latest_sisca08==. 
g gov =(latest_sisca08==3000)  // might want to drop these
g non_fin_prof = (latest_sisca08==4000 | latest_sisca08==1001 | latest_sisca08==1009)
g non_prof = (latest_sisca08==5000)


drop if gov == 1 // remove public fims

** Fix up capex anzsic variable - checking consitent over timne
destring ranzsic06, replace force 
bysort id ranzsic06: g first_obs = (_n==1)
bysort id : egen ind_counts = total(first_obs)
tab ind_counts 
* Some for BLADE anzsic
bysort id latest_anzsic06: g first_obs_fr = (_n==1 & latest_anzsic06!=.)
bysort id : egen ind_counts_fr = total(first_obs_fr)
tab ind_counts_fr 

** Make sure consitent panel anzsic
g industry = latest_anzsic06 // use panel version if firm evr has one
bysort id: egen mode_ind = mode(latest_anzsic06), minmode
replace industry = mode_ind if industry==. // cast back if extra years 
bysort id: egen mode_ind_capex = mode(ranzsic06), minmode
replace industry = mode_ind_capex if industry==. // where still no industry, use most frequent r_capex
count if industry ==. // any missing left?
** Checkin bg_id same over time
bysort id bg_id: g first_bg_fr = (_n==1 & bg_id!="") 
bysort id : egen bg_counts_fr = total(first_bg_fr)
tab bg_counts_fr // checking change bg_id over time
g mult_bg = (bg_counts_fr>1 & bg_counts_fr!=.) // flag for these changing ones.

*Generate indicator for divisions
g division = "A" if industry < 600
replace division = "B" if industry > 599 & industry < 1100
replace division = "C" if industry > 1099 & industry < 2600
replace division = "D" if industry > 2599 & industry < 3000
replace division = "E" if industry > 2999 & industry < 3300
replace division = "F" if industry > 3299 & industry < 3900
replace division = "G" if industry > 3899 & industry < 4400
replace division = "H" if industry > 4399 & industry < 4600
replace division = "I" if industry > 4599 & industry < 5400
replace division = "J" if industry > 5399 & industry < 6200
replace division = "K" if industry > 6199 & industry < 6500
replace division = "L" if industry > 6599 & industry < 6800
replace division = "M" if industry > 6799 & industry < 7100
replace division = "N" if industry > 7199 & industry < 7400
replace division = "O" if industry > 7499 & industry < 8000
replace division = "P" if industry > 7999 & industry < 8300
replace division = "Q" if industry > 8399 & industry < 8800
replace division = "R" if industry > 8899 & industry < 9300
replace division = "S" if industry > 9399 & industry < 9999

gen mining=(division == "B")
g year = tsid
egen date=group(year quarter)

*Merge in EG level income variable
******************************
g id_bg = bg_id
replace id_bg = id if bg_id==""

merge m:1 id_bg tsid using "$savefile\incomes_eg.dta"
drop if _merge == 2
drop _merge
save "$savefile\capex_bit_frame_eginc_20", clear // save version of firm level data with eg income in it

************* 03. Bring in indicator if company in the group **********					  

local yearlist "0102 0203 0304 0405 0506 0607 0708 0809 0910 1011 1112 1213 1314 1415 1516 1617 1718 1819 1920"

foreach fileyear of local yearlist {
	use "\blade2021_bit_id_`fileyear'.dta", clear // bring in raw BLADE bit data to get reporting indicators  (if filled company or other forms)
	display "`fileyear'"
	keep bit_comp_* bit_trust_* bit_part_* bit_ind_* id tsid
	save "$savefile\temp`fileyear'.dta", replace
}
** merge them
use "$savefile\temp0102.dta", clear

local yearlist2 " 0203 0304 0405 0506 0607 0708 0809 0910 1011 1112 1213 1314 1415 1516 1617 1718 1819 1920"
foreach fileyear of local yearlist2 {
	merge m:1 id using "$savefile\temp`fileyear'.dta"
	drop _merge
	display "`fileyear'"
}
save "$savefile\tempAllYearsWide.dta", replace
** Merge onto main data
use "$savefile\capex_bit_frame_eginc_20", clear
keep id date
sort id date
bysort id : keep if _n==_N // keep one obs each
merge 1:m id using "$savefile\tempAllYearsWide.dta" // bring together with these strcuture indicators
drop if _merge == 2
drop _merge
drop tsid
// Stata doesn't like fiscal years or leading zeros, so convert to common integer format, ie, "0405" renamed to "5"
rename *_*_(#)(#)(#)(#) *_*_(#)[5](#)[6]  
rename *_*_(##) *_*_(#) 

reshape long bit_comp_ bit_trust_ bit_part_ bit_ind_, i(id) j(year) // turn back to long
sort id year
drop date
gen year2 = year +2000
replace year =year2
drop year2
save "$savefile\tempAllYearsLong.dta", replace

merge 1:m id year using "$savefile\capex_bit_frame_eginc_20" // merge with the investment etc data
drop if _merge ==1
drop _merge
drop if strpos(id, "CX")>0
destring id, gen(firmid) ignore("ID")
duplicates drop id date, force
save "$savefile\capex_bit_frame_eginc_COY_20.dta", replace


************************















